#加载Word2Vec的软件包
import gensim as gensim
from gensim.models import Word2Vec
from gensim.models.keyedvectors import KeyedVectors
from gensim.models.word2vec import LineSentence
import jieba
import re

f = open("swarma_article_small.txt", 'r', encoding='utf-8')

lines = []
all_words = []
for line in f:
    temp = jieba.lcut(line)
    words = []
    for i in temp:
        # 过滤掉所有的标点符号
        i = re.sub("[\s+\.\!\/_,$%^*(+\"\'“”《%》]+|[+——！，。？、~@#￥%……&*（）：；‘]+", "", i)
        if len(i) > 0:
            words.append(i)
    if len(words)> 0:
        lines.append(words)
        for w in words:
            all_words.append(w)

model = Word2Vec(lines,vector_size=20,window=2,min_count=0)
print(model.wv.most_similar('物理', topn = 20))